Source of Ratings: https://www.whoscored.com/ - date: 22/4/22

data<-read.csv("Original_Scrape.csv",stringsAsFactors = F)

Perfect Scores

Perfect_Scores<-data%>%
  filter(Rating=="10.00")
DT::datatable(Perfect_Scores)
<<<<<<< HEAD
=======
>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998

Cleaning

data_clean<-data%>%
  separate(Player.1,into = c("Name","Starting_Position"),sep=",")%>%
  filter(!Rating=="-")%>%
  mutate(Rating=as.numeric(Rating),
         Starting_Position=trimws(Starting_Position))


data_clean$Name2<-gsub(x=data_clean$Name,pattern ="\\(([^()]+)\\)" ,replacement = "")




data_clean<-data_clean%>%
  mutate(age=as.numeric(str_extract(Name2,pattern = "[:digit:]+")))

data_clean$NameFinal<-trimws(removeNumbers(data_clean$Name2))

data_clean<-data_clean%>%
  separate(date,into = c("day","date"),sep=",")%>%
  mutate(day=trimws(day),
         date=trimws(date))

sub_mins<-str_extract_all(data_clean$Name,pattern = "\\(([^()]+)\\)",simplify = T)%>%
  data.frame()
data_clean<-data_clean%>%
  group_by(NameFinal)%>%
  mutate(numberofmatches=n())

data_clean<-data_clean%>%
  group_by(NameFinal)%>%
  mutate(mean=mean(Rating))%>%
  ungroup()

data_clean<-data_clean%>%
  mutate(deviation=Rating-mean,
         square=deviation^2)%>%
  group_by(NameFinal)%>%
  mutate(square_sum=sum(square))%>%
  ungroup()

data_clean<-data_clean%>%
  mutate(variance=square_sum/(numberofmatches-1))

stats<-data_clean%>%
  select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
  unique()


stats_pos<-data_clean%>%
  mutate(position=case_when(
    Starting_Position%in%c("AMC","AML","AMR")~"Attacking Midfielders",
    Starting_Position%in%c("DC","DL","DR")~"Defenders",
    Starting_Position%in%c("DMC","DML","DMR")~"Defensive Midfielders",
    Starting_Position%in%c("FW","FWL","FWR")~"Forwards",
    Starting_Position%in%c("GK")~"GoalKeepers",
    Starting_Position%in%c("MC","MR","ML")~"Midfielders",
    Starting_Position%in%c("Sub")~"Subs"
  ))%>%
  select(NameFinal,variance,mean,numberofmatches,age,squad,position)%>%
  unique()


stats_10<-stats%>%
  filter(numberofmatches>9)

stats_pos_10<-stats_pos%>%
  filter(numberofmatches>9)

Graphs

annotations <- data.frame(
  xpos = c(-Inf,-Inf,Inf,Inf),
  ypos =  c(-Inf, Inf,-Inf,Inf),
  annotateText = c("Consistently Shit","Inconsistently shit?"
                   ,"Consistently Good","Inconsistently Good?"),
  hjustvar = c(0,0,1,1) ,
  vjustvar = c(-0.5,1,-0.5,1))

ggplot()+
  geom_point(data=stats_10,aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance",subtitle = "At least 10 appearances (inc  Subs)")+
  geom_text_repel(data=stats_10,aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
data_clean_no_subs<-data_clean%>%
  filter(!Starting_Position=="Sub")%>%
  group_by(NameFinal)%>%
  mutate(numberofmatches=n())

data_clean_no_subs<-data_clean_no_subs%>%
  group_by(NameFinal)%>%
  mutate(mean=mean(Rating))%>%
  ungroup()

data_clean_no_subs<-data_clean_no_subs%>%
  mutate(deviation=Rating-mean,
         square=deviation^2)%>%
  group_by(NameFinal)%>%
  mutate(square_sum=sum(square))%>%
  ungroup()

data_clean_no_subs<-data_clean_no_subs%>%
  mutate(variance=square_sum/(numberofmatches-1))

data_clean_no_subs<-data_clean_no_subs%>%
  select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
  unique()

ggplot()+
  geom_point(data=stats_10,aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance",subtitle = "At least 10 appearances (exc Subs)")+
  geom_text_repel(data=stats_10,aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
Subs_version<-data_clean%>%
  filter(Starting_Position=="Sub")%>%
  group_by(NameFinal)%>%
  mutate(numberofmatches=n())

Subs_version<-Subs_version%>%
  group_by(NameFinal)%>%
  mutate(mean=mean(Rating))%>%
  ungroup()

Subs_version<-Subs_version%>%
  mutate(deviation=Rating-mean,
         square=deviation^2)%>%
  group_by(NameFinal)%>%
  mutate(square_sum=sum(square))%>%
  ungroup()

Subs_version<-Subs_version%>%
  mutate(variance=square_sum/(numberofmatches-1))

stats_Subs<-Subs_version%>%
  select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
  unique()

stats_Subs_5<-stats_Subs%>%
  filter(numberofmatches>4)

ggplot()+
  geom_point(data=stats_Subs_5,aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance",subtitle = "At least 5 sub appearances")+ 
  geom_text_repel(data=stats_Subs_5,aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,1)
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
ggplot()+
  geom_point(data=stats_pos_10%>%
               filter(position=="Attacking Midfielders"),aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Attacking Midfielders",subtitle = "At least 10 appearances")+ 
  geom_text_repel(data=stats_pos_10%>%
               filter(position=="Attacking Midfielders"),aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,1.3)
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
ggplot()+
  geom_point(data=stats_pos_10%>%
               filter(position=="Defenders"),aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Defenders",subtitle = "At least 10 appearances")+ 
  geom_text_repel(data=stats_pos_10%>%
               filter(position=="Defenders"),aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,1.3)
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
ggplot()+
  geom_point(data=stats_pos_10%>%
               filter(position=="Defensive Midfielders"),aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Defensive Midfielders",subtitle = "At least 10 appearances")+ 
  geom_text_repel(data=stats_pos_10%>%
               filter(position=="Defensive Midfielders"),aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,1.3)
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
ggplot()+
  geom_point(data=stats_pos_10%>%
               filter(position=="Forwards"),aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Forwards",subtitle = "At least 10 appearances")+ 
  geom_text_repel(data=stats_pos_10%>%
               filter(position=="Forwards"),aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,2.3)

ggplot()+
  geom_point(data=stats_pos_10%>%
               filter(position=="GoalKeepers"),aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - GoalKeepers",subtitle = "At least 10 appearances")+ 
  geom_text_repel(data=stats_pos_10%>%
               filter(position=="GoalKeepers"),aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,1.3)
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
ggplot()+
  geom_point(data=stats_pos_10%>%
               filter(position=="Midfielders"),aes(x=mean,y=variance))+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance - Midfielders",subtitle = "At least 10 appearances")+ 
  geom_text_repel(data=stats_pos_10%>%
               filter(position=="Midfielders"),aes(x=mean,y=variance,label=NameFinal))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  ylim(-.1,1.3)
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
ggplot(data=stats_10,aes(x=mean,y=variance))+
  geom_point()+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance By Teams",subtitle = "At least 10 appearances")+
  ylim(-0.1,1.1)+
  geom_text_repel(data=stats_10,aes(x=mean,y=variance,label=NameFinal),size=3)+
  facet_wrap(~squad,ncol=1)+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD

## Players In positions

=======

## Players In positions

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
data_clean_positions<-data_clean%>%
  filter(!Starting_Position=="Sub")%>%
  group_by(NameFinal,Starting_Position,squad)%>%
  mutate(numberofmatches_in_position=n(),
         percentageinpostion=round(numberofmatches_in_position/numberofmatches,2),
         mean=mean(Rating),
         deviation=Rating-mean,
         square=deviation^2,
         square_sum=sum(square),
         variance=square_sum/(numberofmatches-1))%>%
  ungroup()

stats_postions2<-data_clean_positions%>%
  select(NameFinal,variance,mean,numberofmatches,age,squad,Starting_Position,numberofmatches_in_position,percentageinpostion)%>%
  unique()%>%
  filter(numberofmatches_in_position>4)


ggplot()+
  geom_point(data=stats_postions2,aes(x=percentageinpostion,y=variance))+
  theme_classic()+
  labs(x="% of Matches in Position",y="Variance",title="Variance vs % of matches in Position",
       subtitle = "At Least 5 matches in a position")

ggplot(data=stats_postions2,aes(x=mean,y=variance))+
  geom_point()+
  theme_classic()+
  labs(x="Average Rating",y="Variance",title = "Average Rating v Variance By Position",subtitle = "At least 5 appearances in position")+
  ylim(-0.1,1.1)+
  geom_text_repel(data=stats_postions2,aes(x=mean,y=variance,label=NameFinal),size=3)+
  facet_wrap(~Starting_Position,ncol=1)+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998
stats_positions3<-data_clean%>%
  select(NameFinal,variance,mean,numberofmatches,age,Starting_Position)%>%
  filter(!Starting_Position=="Sub")%>%
  unique()%>%
  group_by(NameFinal)%>%
  mutate(different_position=n(),
         different_position_text=case_when(
           different_position==1~"1",
           different_position==2~"2",
           different_position==3~"3",
           different_position==4~"4",
           different_position==5~"5",
           different_position==6~"6",
           different_position==7~"7",
           different_position==8~"8"
         ))%>%
  ungroup()%>%
  filter(numberofmatches>9)

ggplot(data=stats_positions3)+
  geom_point(aes(x=mean,y=variance))+
  theme_classic()+
  labs(title="Checking consistancy when playing in multiple positions",
       subtitle="At least 5 Matches")+
  facet_wrap(~different_position_text,ncol=2)

### Seems when you play more positions you are more consistant but less good

Variance V Top 6

top6<-c("Arsenal","Chelsea","Liverpool","Manchester City","Manchester United","Tottenham")

top6_data_clean<-data_clean%>%
  filter(opp%in%top6)%>%
  group_by(NameFinal)%>%
  mutate(numberofmatches=n(),
         mean=mean(Rating),
         deviation=Rating-mean,
         square=deviation^2,
         square_sum=sum(square),
         variance=square_sum/(numberofmatches-1))%>%
  ungroup()%>%
  filter(numberofmatches>4)


stats<-data_clean%>%
  select(NameFinal,variance,mean,numberofmatches,age,squad)%>%
  unique()

top6_stats<-top6_data_clean%>%
  select(NameFinal,variance,mean,numberofmatches)%>%
  unique()


ggplot(data=top6_stats,aes(x=mean,y=variance))+
  geom_point()+
  theme_classic()+
  geom_text_repel(data=top6_stats,aes(x=mean,y=variance,label=NameFinal),box.padding = 0.1)+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText),color="red")+
  labs(title="Variance v Average Ratings against top 6",
       subtitle="At least 5 Appearances",x="Average Ratings",y="Variance")+
  ylim(-0.1,3.5)
## Warning: ggrepel: 267 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
<<<<<<< HEAD

=======

>>>>>>> ad691fee9c20e6404d06b45c8080a3fe7cc5c998